library(devtools) # Reproducibility (see end of file)
library(phyloseq) # Easier data manipulation
library(tidyverse) # Pretty plotting and data manipulation
library(forcats) # Recoding factors
library(cowplot) # Multiple plotting
library(d3heatmap) # For nice heatmaps
library(phytools) # Working with phylogenetic trees
library(ggtree) # Pretty ggplot-esque phylogenetic trees
library(ape) # Working with phylogenetic trees
library(DT) # Pretty Tables
library(gplots) # Heatmap.2 function
library(stringr) # To replace zeros in OTU names
library(adephylo) # Testing phylogenetic autocorrelation
library(geiger) # Phylogenetic signal
source("code/set_colors.R") # Set Colors for plotting
# Set some global ggplot parameters
mytheme <- theme(legend.text = element_text(size = 8), legend.title = element_text(size = 8, face = "bold"),
plot.title = element_text(size = 10), legend.position = c(0.01, 0.9),
axis.title = element_text(size = 10, face = "bold"), axis.text = element_text(size = 8),
legend.key.width=unit(0.1,"line"), legend.key.height=unit(0.1,"line"))
# Read in the data
raw_data <- read.table(file="data/Chloroplasts_removed/old/nochloro_HNA_LNA.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
# Subset out only the Muskegon and Surface samples
muskegon_data <- raw_data %>%
dplyr::filter(Lake == "Muskegon" & Depth == "Surface") %>%
dplyr::select(norep_filter_name)
# Load in the productivity data
production <- read.csv(file="data/production_data.csv", header = TRUE) %>%
dplyr::filter(fraction == "Free" & limnion == "Surface") %>% # Select only rows that are free-living
dplyr::select(names, tot_bacprod, SD_tot_bacprod) %>% # Select relevant columns for total productivity
mutate(tot_bacprod = round(tot_bacprod, digits = 2), # Round to 2 decimals
SD_tot_bacprod = round(SD_tot_bacprod, digits = 2) ) %>%
dplyr::rename(norep_filter_name = names) %>% # Rename to match other data frame
arrange(norep_filter_name)
# Stop if the names do not match
stopifnot(muskegon_data$norep_filter_name == production$norep_filter_name)
# Combine the two muskegon data frames into one
combined_data <- left_join(muskegon_data, production, by = "norep_filter_name")
# Merge the combined data back into the original data frame (data)
data <- left_join(raw_data, combined_data, by = "norep_filter_name") %>%
dplyr::select(-c(Platform, Fraction)) %>%
mutate(Lake = factor(Lake, levels = c("Michigan","Inland", "Muskegon")))
head(data)
## samples Total.cells HNA.cells LNA.cells Total.count.sd HNA.sd LNA.sd Lake Sample_16S Season Month Year Site Depth Total_Sequences norep_filter_name tot_bacprod SD_tot_bacprod
## 1 110D2-115-2 620420.9 139880.7 480540.2 1640.275 4639.952 5795.307 Michigan 110D2F115 Winter Januari 2015 MM110 Deep 19654 110DF115 NA NA
## 2 110D2-415-2 799314.0 155198.1 644115.9 9388.845 4198.741 5493.442 Michigan 110D2F415 Spring April 2015 MM110 Deep 7951 110DF415 NA NA
## 3 110D2-515 1261369.4 362443.6 898925.9 15341.779 10504.696 6299.495 Michigan 110D2F515 Spring May 2015 MM110 Deep 16293 110DF515 NA NA
## 4 110D2-715-2 542723.6 131132.1 411591.5 6139.696 2196.008 3967.958 Michigan 110D2F715 Summer July 2015 MM110 Deep 16882 110DF715 NA NA
## 5 110D2-815 548027.9 131820.5 416207.4 3552.010 3805.294 2441.935 Michigan 110D2F815 Summer August 2015 MM110 Deep 22157 110DF815 NA NA
## 6 110D2-915-2 731931.0 189746.2 542184.8 36726.396 6634.230 30473.925 Michigan 110D2F915 Fall September 2015 MM110 Deep 16500 110DF915 NA NA
# Fix the metadata
data$Month[data$Month == "Januari"] <- "June"
data$Season[data$Season == "Winter"] <- "Summer"
data$Lake[data$Site == "MLB"] <- "Muskegon"
# Write out the file
#write.table(data, file="data/Chloroplasts_removed/productivity_data.tsv", row.names=TRUE)
####### HELPFUL TRANSFORMED AND SUBSETTED DATA
##### Subset Muskegon Lake Data Only
muskegon <- dplyr::filter(data, Lake == "Muskegon" & Depth == "Surface") %>%
mutate(Site = factor(Site, levels = c("MOT", "MDP", "MBR", "MIN"))) %>%
filter(tot_bacprod < 90)
# Gather the data for summary statistics
df_cells <- data %>%
dplyr::select(1:4, Lake, Season:Depth) %>%
rename(Total = Total.cells, HNA = HNA.cells, LNA = LNA.cells) %>%
gather(key = FCM_type, value = num_cells, Total:LNA)
# Number of cells in HNA and LNA across all samples
# For a plot of these values, see figure 1 below
df_cells %>%
group_by(FCM_type) %>%
summarise(num_samples = n(),
mean_cells = round(mean(num_cells), digits = 2),
sd_mean_cells = round(sd(num_cells), digits = 2),
median_cells = round(median(num_cells), digits = 2)) %>%
datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Are there more total cells in one lake over the other?
totcells_df <- df_cells %>%
filter(FCM_type == "Total")
# Compute the analysis of variance
totcells_aov <- aov(num_cells ~ Lake, data = totcells_df)
summary(totcells_aov) # there is a difference, but which lake?
## Df Sum Sq Mean Sq F value Pr(>F)
## Lake 2 6.995e+14 3.498e+14 83.84 <2e-16 ***
## Residuals 170 7.092e+14 4.172e+12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Which samples are significant from each other?
TukeyHSD(totcells_aov) # Michigan is significantly different form Muskegon and Inland
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = num_cells ~ Lake, data = totcells_df)
##
## $Lake
## diff lwr upr p adj
## Inland-Michigan 4581227.7 3658141 5504314.6 0.0000000
## Muskegon-Michigan 4332278.2 3409191 5255365.0 0.0000000
## Muskegon-Inland -248949.5 -1116299 618399.9 0.7762399
# Number of cells in HNA and LNA per ecosystem
df_cells %>%
group_by(Lake, FCM_type) %>%
summarise(num_samples = n(),
mean_cells = round(mean(num_cells), digits = 2),
median_cells = round(median(num_cells), digits = 2)) %>%
datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Proportion of HNA and LNA across all samples
df_cells %>%
dplyr::select(samples, Lake, FCM_type, num_cells) %>%
spread(FCM_type, num_cells) %>%
mutate(prop_HNA = HNA/Total * 100,
prop_LNA = LNA/Total * 100) %>%
dplyr::select(Lake, prop_HNA, prop_LNA) %>%
summarize(mean_HNA = round(mean(prop_HNA), digits = 2),
sd_HNA = round(sd(prop_HNA), digits = 2),
mean_LNA = round(mean(prop_LNA), digits = 2),
sd_LNA = round(sd(prop_LNA), digits = 2))
## mean_HNA sd_HNA mean_LNA sd_LNA
## 1 30.41 9.06 69.59 9.06
# Proportion of HNA and LNAper ecosystem
prop_stats <- df_cells %>%
dplyr::select(samples, Lake, FCM_type, num_cells) %>%
spread(FCM_type, num_cells) %>%
mutate(prop_HNA = HNA/Total * 100,
prop_LNA = LNA/Total * 100) %>%
dplyr::select(Lake, prop_HNA, prop_LNA) %>%
rename(HNA = prop_HNA, LNA = prop_LNA) %>%
group_by(Lake) %>%
summarize(mean_HNA = round(mean(HNA), digits = 2),
mean_LNA = round(mean(LNA), digits = 2),
min_HNA = round(min(HNA), digits = 2),
max_HNA = round(max(HNA), digits = 2),
min_LNA = round(min(LNA), digits = 2),
max_LNA = round(max(LNA), digits = 2),
num_samples = n())
datatable(prop_stats, caption = "Statistics of the percentage of each flow cytometry group across the systems", rownames = FALSE)
# Load in the data from each lake
musk_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
mich_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/michigan/michigan_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
inla_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/inland/inland_sampledata_5in10.tsv", header = TRUE) %>%
mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
arrange(norep_filter_name)
stopifnot(colnames(musk_all_df) == colnames(mich_all_df))
stopifnot(colnames(musk_all_df) == colnames(inla_all_df))
lakes <- bind_rows(musk_all_df, mich_all_df, inla_all_df)
ggplot(df_cells, aes(x = FCM_type, y = num_cells, fill = Lake, color = Lake, shape = Lake)) +
geom_point(size = 1.5, position = position_jitterdodge(), color = "black") +
geom_boxplot(alpha = 0.7, outlier.shape = NA, show.legend = FALSE, color = "black") +
scale_color_manual(values = lake_colors, guide = "none") +
scale_fill_manual(values = lake_colors) +
scale_shape_manual(values = lake_shapes) +
labs(y = "Number of Cells (cells/mL)", x = "FCM Type") +
mytheme + theme(legend.title = element_blank(), legend.position = c(0.01, 0.95)) +
guides(colour = guide_legend(override.aes = list(size=2.5)),
shape = guide_legend(override.aes = list(size=2.5)),
fill = guide_legend(override.aes = list(size=2.5)))
######################## Analysis of HNA/LNA/Total Cells vs Total Productivity
# 1. Run the linear model
lm_HNA <- lm(tot_bacprod ~ HNA.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_HNA_stats <- paste("atop(R^2 ==", round(summary(lm_HNA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_HNA)$coefficients[,4][2]), digits = 5), ")")
# 3. Plot it
HNA_vs_prod <- ggplot(muskegon, aes(x = HNA.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
geom_point(size = 2, shape = 22, fill = "deepskyblue4") +
geom_smooth(method = "lm", color = "deepskyblue4") +
labs(y = "Bacterial Production \n (μg C/L/day)", x = "HNA Cell Density \n(cells/mL)") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE),
breaks = c(2e+06, 3e+06)) +
annotate("text", x=1.65e+06, y=60, label=lm_HNA_stats, parse = TRUE, color = "black", size = 3) +
mytheme
# 1. Run the linear model
lm_LNA <- lm(tot_bacprod ~ LNA.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_LNA_stats <- paste("atop(R^2 ==", round(summary(lm_LNA)$adj.r.squared, digits = 3), ",",
"p ==", round(unname(summary(lm_LNA)$coefficients[,4][2]), digits = 2), ")")
# 3. Plot it
LNA_vs_prod <- ggplot(muskegon, aes(x = LNA.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = LNA.cells - LNA.sd, xmax = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
geom_point(size = 2.5, shape = 22, fill = "darkgoldenrod1") +
labs(y = "Bacterial Production \n (μg C/L/day)", x = "LNA Cell Density \n(cells/mL)") +
geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "darkgoldenrod1") +
annotate("text", x=2.75e+06, y=60, label=lm_LNA_stats, parse = TRUE, color = "red", size = 3) +
mytheme
# 1. Run the linear model
lm_total <- lm(tot_bacprod ~ Total.cells, data = muskegon)
## 2. Extract the R2 and p-value from the linear model:
lm_total_stats <- paste("atop(R^2 ==", round(summary(lm_total)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_total)$coefficients[,4][2]), digits = 2), ")")
# 3. Plot it
Total_vs_prod <- ggplot(muskegon, aes(x = Total.cells, y = tot_bacprod)) +
geom_errorbarh(aes(xmin = Total.cells - Total.count.sd, xmax = Total.cells + Total.count.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) +
scale_shape_manual(values = lake_shapes) +
geom_point(size = 2.5, shape = 22, fill = "black") +
labs(y = "Bacterial Production \n (μg C/L/day)", x = "Total Cell Density \n(cells/mL)") +
geom_smooth(method = "lm", color = "black") +
#geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "red") +
annotate("text", x=5.25e+06, y=60, label=lm_total_stats, parse = TRUE, color = "black", size = 3) +
mytheme
###### ###### ###### ###### ###### ###### ###### ###### ###### ######
###### Correlation between HNA and LNA Across the three systems ######
## Is there a corrlation between HNA and LNA across ecosystems?
# 1. Run the linear model
lm_allNA_corr <- lm(LNA.cells ~ HNA.cells, data = lakes)
summary(lm(LNA.cells ~ HNA.cells * Lake, data = lakes))
##
## Call:
## lm(formula = LNA.cells ~ HNA.cells * Lake, data = lakes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3651006 -558525 -113725 511977 3997810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.699e+06 3.590e+05 10.304 < 2e-16 ***
## HNA.cells 3.827e-01 1.704e-01 2.246 0.026 *
## LakeMichigan -2.995e+06 4.523e+05 -6.622 4.63e-10 ***
## LakeMuskegon -2.363e+06 5.411e+05 -4.367 2.21e-05 ***
## HNA.cells:LakeMichigan 5.404e-01 4.257e-01 1.269 0.206
## HNA.cells:LakeMuskegon 1.027e+00 2.549e-01 4.029 8.50e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1301000 on 167 degrees of freedom
## Multiple R-squared: 0.6116, Adjusted R-squared: 0.6
## F-statistic: 52.6 on 5 and 167 DF, p-value: < 2.2e-16
## 2. Extract the R2 and p-value from the linear model:
lm_allNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_allNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_allNA_corr)$coefficients[,4][2]), digits = 24), ")")
# 3. Plot it
p2 <- ggplot(lakes, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) +
geom_point(size = 2.5, alpha = 0.9, aes(fill = Lake, shape = Lake)) +
scale_fill_manual(values = lake_colors) +
scale_shape_manual(values = lake_shapes) +
geom_smooth(method = "lm", color = "black") +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
annotate("text", x=5e+06, y=0.8e+06, label=lm_allNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme + theme(legend.position = "none") #theme(legend.title = element_blank(), legend.position = c(0.01, 0.95))
###### MAKE THE PLOT
## Extract legends for plotting
plot_for_legend <-
ggplot(df_cells, aes(x = Lake, y = num_cells, fill = FCM_type, color = FCM_type)) +
geom_point(size = 1, shape = 22, position = position_jitterdodge()) +
scale_fill_manual(values = fcm_colors) +
scale_color_manual(values = fcm_colors, guide = "none") +
scale_shape_manual(values = 22) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
labs(y = "Number of Cells \n (cells/mL)", x = "Lake") +
theme(legend.position = "right",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12),
legend.key.width=unit(1,"line"),
legend.key.height=unit(1,"line")) +
guides(fill = guide_legend(override.aes = list(size=3.5)))
legend1 <- get_legend(plot_for_legend)
legend2 <- get_legend(p2 + theme(legend.position = "right",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12),
legend.key.width=unit(1,"line"),
legend.key.height=unit(1,"line")) +
guides(fill = guide_legend(override.aes = list(size=3.5))))
# place the legends
legend_positions <- plot_grid(legend1, legend2, nrow = 2, ncol = 1)
# Place the legends next to figure 1A
row1 <- plot_grid(NULL, p2, legend_positions,
labels = c("", "A", ""),
ncol = 3, nrow = 1,
rel_widths = c(0.5, 1, 0.5))
# Add Figures 1B, 1C, and 1D
row2 <- plot_grid(HNA_vs_prod + theme(legend.position = "none"),
LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
labels = c("B", "C", "D"),
ncol = 3, nrow = 1,
rel_widths = c(0.95, 0.8, 0.8))
# Final Figure 1
plot_grid(row1, row2,
nrow = 2, ncol = 1)
### MUSKEGON ONLY ANALYSIS
# 1. Run the linear model
lm_muskNA_corr <- lm(LNA.cells ~ HNA.cells, data = musk_all_df)
## 2. Extract the R2 and p-value from the linear model:
lm_muskNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_muskNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_muskNA_corr)$coefficients[,4][2]), digits = 9), ")")
musk_corr_plot <- ggplot(musk_all_df, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
ggtitle("Muskegon Lake") + scale_fill_manual(values = lake_colors) +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_muskNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
### INLAND ONLY ANALYSIS
# 1. Run the linear model
lm_inlaNA_corr <- lm(LNA.cells ~ HNA.cells, data = filter(inla_all_df, Sample_16S != "Z14003F"))
## 2. Extract the R2 and p-value from the linear model:
lm_inlaNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_inlaNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_inlaNA_corr)$coefficients[,4][2]), digits = 2), ")")
inla_corr_plot <- ggplot(filter(inla_all_df, Sample_16S != "Z14003F"), aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
ggtitle("Inland Lakes") + scale_fill_manual(values = lake_colors) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_inlaNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
### MICHIGAN ONLY ANALYSIS
# 1. Run the linear model
lm_michNA_corr <- lm(LNA.cells ~ HNA.cells, data = mich_all_df)
# Without the Lake Michigan outlier!
summary(lm(LNA.cells ~ HNA.cells, data = filter(mich_all_df, Sample_16S != "M15S2F515")))
##
## Call:
## lm(formula = LNA.cells ~ HNA.cells, data = filter(mich_all_df,
## Sample_16S != "M15S2F515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -599787 -239620 -95094 241831 987737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.832e+05 9.793e+04 5.956 3.37e-07 ***
## HNA.cells 1.200e+00 1.797e-01 6.678 2.78e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 354600 on 46 degrees of freedom
## Multiple R-squared: 0.4922, Adjusted R-squared: 0.4812
## F-statistic: 44.59 on 1 and 46 DF, p-value: 2.778e-08
## 2. Extract the R2 and p-value from the linear model:
lm_michNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_michNA_corr)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_michNA_corr)$coefficients[,4][2]), digits = 11), ")")
mich_corr_plot <- ggplot(mich_all_df, aes(x = HNA.cells, y = LNA.cells)) +
geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") +
geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") +
geom_point(size = 3, shape = 22, aes(fill = Lake)) +
geom_smooth(method = "lm", color = "black") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
ggtitle("Lake Michigan") + scale_fill_manual(values = lake_colors) +
labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
annotate("text", x=5e+06, y=0.8e+06, label=lm_michNA_corr_stats, parse = TRUE, color = "black", size = 3) +
mytheme
plot_grid(mich_corr_plot, inla_corr_plot, musk_corr_plot,
labels = c("A", "B", "C"), nrow = 1, ncol = 3)
# Which sample is the outlier? And how does it compare from the other top 3 samples?
mich_all_df %>% arrange(-Total.cells) %>% head(n=3)
## samples Total.cells HNA.cells LNA.cells Total.count.sd HNA.sd LNA.sd Platform Lake Sample_16S Season Month Year Fraction Site Depth Total_Sequences norep_filter_name
## 1 M15S2-515-DMSO 6426699 3182154.2 3244545 80936.74 42003.28 58941.66 Accuri Michigan M15S2F515 Spring May 2015 Free MM15 Surface 14860 M15SF515
## 2 MM15-D-D_April 3019947 1329590.7 1690357 35081.79 25818.56 25525.35 Accuri Michigan Sp13.BD.MM15.DD.1 Spring April 2013 Free MM15 Deep 35392 Sp13BD.M
## 3 MM15-S-N_July 2963173 632722.9 2330450 37604.70 16918.47 27240.06 Accuri Michigan Su13.BD.MM15.SN.1 Summer July 2013 Free MM15 Surface 35065 Su13BD.M
## Plot the fraction of HNA
fmusk <- muskegon %>%
mutate(fHNA = HNA.cells/Total.cells,
fLNA = LNA.cells/Total.cells)
lm_fHNA <- lm(tot_bacprod ~ fHNA, data = fmusk)
## 2. Extract the R2 and p-value from the linear model:
lm_fHNA_stats <- paste("atop(R^2 ==", round(summary(lm_fHNA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_fHNA)$coefficients[,4][2]), digits = 3), ")")
fHNA_vs_prod <- ggplot(fmusk, aes(x = fHNA, y = tot_bacprod)) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") +
geom_point(size = 3, aes(shape = Lake), fill = "deepskyblue4") +
geom_smooth(method = "lm", linetype = "longdash", color = "deepskyblue4") +
scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) +
ylab("Bacterial Production") + xlab("Fraction HNA") +
scale_shape_manual(values = lake_shapes) +
annotate("text", x= 0.22, y=60, label=lm_fHNA_stats, parse = TRUE, color = "black", size = 3) +
mytheme + theme(legend.position = "none")
### LNA Fraction
## Plot the fraction of HNA
lm_fLNA <- lm(tot_bacprod ~ fLNA, data = fmusk)
## 2. Extract the R2 and p-value from the linear model:
lm_fLNA_stats <- paste("atop(R^2 ==", round(summary(lm_fLNA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_fLNA)$coefficients[,4][2]), digits = 3), ")")
fLNA_vs_prod <- ggplot(fmusk, aes(x = fLNA, y = tot_bacprod)) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") +
geom_point(size = 3, aes(shape = Lake), fill = "darkgoldenrod1") +
geom_smooth(method = "lm", color = "darkgoldenrod1", fill = "darkgoldenrod1", linetype = "longdash") +
scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) +
ylab("Bacterial Production") + xlab("Fraction LNA") +
scale_shape_manual(values = lake_shapes) +
annotate("text", x= 0.22, y=60, label=lm_fLNA_stats, parse = TRUE, color = "black", size = 3) +
mytheme + theme(legend.position = "none")
plot_grid(HNA_vs_prod + theme(legend.position = "none"),
fHNA_vs_prod,
LNA_vs_prod, fLNA_vs_prod,
labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2,
align = "h")
# Read in Data
dfHNA_musk_scores <- read.csv("FS_final/Muskegon_fs_scores_HNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.09) %>%
mutate(Lake = "Muskegon")%>%
rename(OTU = X, RL.score.HNA = RL.score) %>%
dplyr::select(OTU, RL.score.HNA)
dfLNA_musk_scores <- read.csv("FS_final/Muskegon_fs_scores_LNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.09) %>%
mutate(Lake = "Muskegon") %>%
rename(OTU = X, RL.score.LNA = RL.score) %>%
dplyr::select(OTU, RL.score.LNA)
dfHNA_inland_scores <- read.csv("FS_final/Inland_fs_scores_HNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.13) %>%
mutate(Lake = "Inland")%>%
rename(OTU = X, RL.score.HNA = RL.score) %>%
dplyr::select(OTU, RL.score.HNA)
dfLNA_inland_scores <- read.csv("FS_final/Inland_fs_scores_LNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.108) %>%
mutate(Lake = "Inland") %>%
rename(OTU = X, RL.score.LNA = RL.score) %>%
dplyr::select(OTU, RL.score.LNA)
dfHNA_michigan_scores <- read.csv("FS_final/Michigan_fs_scores_HNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.248) %>%
mutate(Lake = "Michigan")%>%
rename(OTU = X, RL.score.HNA = RL.score) %>%
dplyr::select(OTU, RL.score.HNA)
dfLNA_michigan_scores <- read.csv("FS_final/Michigan_fs_scores_LNA_5seq10.csv") %>%
dplyr::filter(RL.score > 0.286) %>%
mutate(Lake = "Michigan") %>%
rename(OTU = X, RL.score.LNA = RL.score) %>%
dplyr::select(OTU, RL.score.LNA)
# Combine HNA and LNA datasets together
muskegon_df_scores <- full_join(dfHNA_musk_scores, dfLNA_musk_scores, by = "OTU") %>%
rename(Muskegon_HNA = RL.score.HNA, Muskegon_LNA = RL.score.LNA) # Rename the column to have the lake and FCM
inland_df_scores <- full_join(dfHNA_inland_scores, dfLNA_inland_scores, by = "OTU") %>%
rename(Inland_HNA = RL.score.HNA, Inland_LNA = RL.score.LNA) # Rename the column to have the lake and FCM
michigan_df_scores <- full_join(dfHNA_michigan_scores, dfLNA_michigan_scores, by = "OTU") %>%
rename(Michigan_HNA = RL.score.HNA, Michigan_LNA = RL.score.LNA) # Rename the column to have the lake and FCM
# Combine into a full dataset
df_RLscores <-
full_join(muskegon_df_scores, inland_df_scores, by = "OTU") %>%
full_join(michigan_df_scores, by = "OTU")
matrix_RLscores <- df_RLscores %>%
tibble::column_to_rownames(var = "OTU") %>%
as.matrix()
# Replace the NA values with 0
matrix_RLscores[is.na(matrix_RLscores)] <- 0
breakers <- seq(min(matrix_RLscores, na.rm = T), max(matrix_RLscores, na.rm = T), length.out = 21)
my_palette <- colorRampPalette(c("white", "red")) (n=20)
# Set the colors of the different columns going in
colz <- c("#FF933F", "#FF933F", "#EC4863", "#EC4863", "#5C2849", "#5C2849")
heatmap.2(matrix_RLscores,
distfun = function(x) dist(x, method = "euclidean"),
hclustfun = function(x) hclust(x,method = "complete"),
col = my_palette, breaks = breakers, trace = "none",
key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
key.xlab = "RL Score",ColSideColors = colz,
lhei = c(1,9))
# Extract the top 10 OTUs
mich_top10_OTUs <- michigan_df_scores %>%
top_n(., 10, Michigan_HNA) %>%
dplyr::select(OTU)
inland_top10_HNA_OTUs <- inland_df_scores %>%
dplyr::select(OTU, Inland_HNA) %>%
top_n(., 10, Inland_HNA) %>%
dplyr::select(OTU)
inland_top10_LNA_OTUs <- inland_df_scores %>%
dplyr::select(OTU, Inland_LNA) %>%
top_n(., 10, Inland_LNA) %>%
dplyr::select(OTU)
musk_top10_HNA_OTUs <- muskegon_df_scores %>%
dplyr::select(OTU, Muskegon_HNA) %>%
top_n(., 10, Muskegon_HNA) %>%
dplyr::select(OTU)
musk_top10_LNA_OTUs <- muskegon_df_scores %>%
dplyr::select(OTU, Muskegon_LNA) %>%
top_n(., 10, Muskegon_LNA) %>%
dplyr::select(OTU)
# The above code pulls out the OTUs and we'll match this with the data from the full dataset
df_OTUs <- bind_rows(mich_top10_OTUs, inland_top10_HNA_OTUs, inland_top10_LNA_OTUs, musk_top10_HNA_OTUs, musk_top10_LNA_OTUs) %>%
unique()
# Do a filtering join
df_top10 <- df_OTUs %>%
left_join(df_RLscores, by = "OTU")
matrix_top10 <- df_top10 %>%
# Remove the zeros in the OTU names
mutate(OTU = str_replace(OTU, "00", "")) %>%
tibble::column_to_rownames(var = "OTU") %>%
as.matrix()
# The heatmap won't work with NAs
matrix_top10[is.na(matrix_top10)] <- 0
#png("Analysis_Figures/clustering_top10.png", units = "in", width = 6, height = 8, res = 300)
heatmap.2(matrix_top10,
distfun = function(x) dist(x, method = "euclidean"),
hclustfun = function(x) hclust(x,method = "complete"),
col = my_palette, breaks = breakers, trace = "none",
key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
key.xlab = "RL Score",ColSideColors = colz,
lhei = c(1.5,9), key.title=NA)
#dev.off()
# WITHOUT the clustering!
heatmap.2(matrix_top10,
dendrogram = "none", Rowv = FALSE, Colv = FALSE,
col = my_palette, breaks = breakers, trace = "none",
key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
key.xlab = "RL Score",ColSideColors = colz,
lhei = c(1,9), #lmat = rbind(1,2),
key.title=NA)
## Error in plot.new(): figure margins too large
# Only cluster the columns
heatmap.2(matrix_top10,
distfun = function(x) dist(x, method = "euclidean"),
hclustfun = function(x) hclust(x,method = "complete"),
# Do not cluster based on rows
dendrogram = "column",Rowv = FALSE,
col = my_palette, breaks = breakers, trace = "none",
key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
key.xlab = "RL Score",ColSideColors = colz,
lhei = c(1.5,9), key.title=NA)
#### PHYLOGENETIC ANALYSIS
load("data/phyloseq.RData")
physeq.otu
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 13370 taxa and 859 samples ]
## tax_table() Taxonomy Table: [ 13370 taxa by 7 taxonomic ranks ]
# With all of the OTUs that pass the RL score threshold
otu_scores_df <- matrix_RLscores %>%
as.data.frame() %>%
tibble::rownames_to_column("OTU")
# Subset the vector with names of the OTUs
vector_of_otus <- as.vector(otu_scores_df$OTU)
# Write to a file
#write(vector_of_otus, file = "data/fasttree/Figure5/OTUnames_RLscores_258.txt",
# ncolumns = 1, append = FALSE, sep = "\n")
physeq <- physeq.otu %>%
subset_taxa(., taxa_names(physeq.otu) %in% vector_of_otus)
# Which OTU is missing?
setdiff(sort(vector_of_otus), sort(taxa_names(physeq)))
## character(0)
setdiff(sort(taxa_names(physeq)), sort(vector_of_otus))
## character(0)
######################################### FASTTREE
fast_tree <- read.tree(file="data/fasttree/Figure5/newick_tree_OTUs258.tre")
## Error in file(file, "r"): cannot open the connection
fast_tree_tip_order <- data.frame(fast_tree$tip.label) %>%
rename(OTU = fast_tree.tip.label)
## Error in data.frame(fast_tree$tip.label): object 'fast_tree' not found
# Make sure the OTUs match
stopifnot(sort(vector_of_otus)==sort(as.character(fast_tree_tip_order$OTU)))
## Error in sort(vector_of_otus) == sort(as.character(fast_tree_tip_order$OTU)): object 'fast_tree_tip_order' not found
fasttree_physeq <- merge_phyloseq(physeq, phy_tree(fast_tree))
## Error in phy_tree(fast_tree): object 'fast_tree' not found
# Fix the taxonomy names
colnames(tax_table(fasttree_physeq)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
## Error in colnames(tax_table(fasttree_physeq)) <- c("Kingdom", "Phylum", : object 'fasttree_physeq' not found
###################################################################### ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(fasttree_physeq))
## Error in tax_table(fasttree_physeq): object 'fasttree_physeq' not found
Phylum <- as.character(phy$Phylum)
## Error in eval(expr, envir, enclos): object 'phy' not found
Class <- as.character(phy$Class)
## Error in eval(expr, envir, enclos): object 'phy' not found
for (i in 1:length(Phylum)){
if (Phylum[i] == "Proteobacteria"){
Phylum[i] <- Class[i] } }
## Error in eval(expr, envir, enclos): object 'Phylum' not found
phy$Phylum <- Phylum # Add the new phylum level data back to phy
## Error in eval(expr, envir, enclos): object 'Phylum' not found
t <- tax_table(as.matrix(phy))
## Error in as.matrix(phy): object 'phy' not found
tax_table(fasttree_physeq) <- t
## Error in tax_table(fasttree_physeq) <- t: object 'fasttree_physeq' not found
fasttree_tax <- data.frame(tax_table(fasttree_physeq)) %>%
tibble::rownames_to_column(var = "OTU")
## Error in tax_table(fasttree_physeq): object 'fasttree_physeq' not found
## Let's root the tree
is.rooted(fast_tree)
## Error in is.rooted(fast_tree): object 'fast_tree' not found
rooted_tree <- root(fast_tree, outgroup = "Otu001533", resolve.root = TRUE)
## Error in root(fast_tree, outgroup = "Otu001533", resolve.root = TRUE): object 'fast_tree' not found
is.rooted(rooted_tree) # Sanity Check
## Error in is.rooted(rooted_tree): object 'rooted_tree' not found
# Need to make a manual file based off of FIgure 4 - it starts with OTU names
# write.csv(df_RLscores, file = "data/fasttree/Figure5/OTUs258_MANUAL.csv", row.names = FALSE)
# Next I manually filled in the fcm_type and lake columns :/
phyfcm_fasttree_df <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
left_join(., fasttree_tax, by = "OTU") %>%
tibble::column_to_rownames("OTU") %>%
dplyr::select(Phylum, FCM)
## Error in file(file, "rt"): cannot open the connection
rooted_tree_plot <-
ggplot(rooted_tree, aes(x, y)) + geom_tree() + theme_tree() +
geom_tiplab(size=3, align=TRUE, linesize=.5) #+
## Error in ggplot(rooted_tree, aes(x, y)): object 'rooted_tree' not found
#geom_nodelab(vjust=-.5, size=3)
gheatmap(rooted_tree_plot, phyfcm_fasttree_df, offset = 0.1, width=0.3, font.size=3, colnames_angle=0, hjust=0.5) +
scale_fill_manual(values = phylum_colors,
breaks = c("Acidobacteria","Actinobacteria", "Alphaproteobacteria", "Bacteria_unclassified", "Bacteroidetes", "Betaproteobacteria",
"Chlorobi", "Chloroflexi","Cyanobacteria","Deltaproteobacteria", "Epsilonproteobacteria", "Firmicutes",
"Gammaproteobacteria","Gemmatimonadetes","Omnitrophica","Parcubacteria","Planctomycetes",
"Proteobacteria_unclassified", "TA18","unknown_unclassified", "Verrucomicrobia",
# FCM Groups
"Both", "HNA", "LNA")) +
guides(fill=guide_legend(ncol=1))
## Error in eval(lhs, parent, parent): object 'rooted_tree_plot' not found
OTU258_and_phylum <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
left_join(., fasttree_tax, by = "OTU") %>%
dplyr::select(OTU, Phylum)
OTU258_and_FCM <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
left_join(., fasttree_tax, by = "OTU") %>%
dplyr::select(OTU, FCM) %>%
rename(Phylum = FCM) # To extract from phylum_colors
itol_colors <-
phylum_colors %>%
as.list() %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column("Phylum") %>%
rename(color_name = V1) %>%
mutate(hex = col2hex(color_name)) %>%
dplyr::select(hex, Phylum)
itol_phylum_df <- left_join(OTU258_and_phylum, itol_colors, by = "Phylum") %>%
mutate(itol_label = "range") %>%
dplyr::select(OTU, itol_label, hex, Phylum)
#write.csv(itol_phylum_df, file = "data/fasttree/Figure5/itol_df.csv", row.names = FALSE)
#itol_phylum_df %>%
# dplyr::select(OTU, hex, Phylum) %>%
# write.csv(file = "data/fasttree/Figure5/itol_PHYLUM_df.csv", row.names = FALSE)
itol_FCM_df <- left_join(OTU258_and_FCM, itol_colors, by = "Phylum") %>%
rename(FCM = Phylum) %>%
dplyr::select(OTU, hex, FCM)
#write.csv(itol_FCM_df, file = "data/fasttree/Figure5/itol_FCM_df.csv", row.names = FALSE)
# FOR RL SCORE in iTOL
# Write a file for HNA
#read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
# dplyr::select(OTU, Muskegon_HNA, Inland_HNA, Michigan_HNA) %>%
# write.csv(file = "data/fasttree/Figure5/HNA_RLscores.csv", row.names = FALSE)
# Write a file for LNA
#read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
# dplyr::select(OTU, Muskegon_LNA, Inland_LNA, Michigan_LNA) %>%
# write.csv(file = "data/fasttree/Figure5/LNA_RLscores.csv", row.names = FALSE)
read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
dplyr::select(OTU, RL_score) %>%
rename(LNA_score = RL_score) %>%
# write.csv(file = "data/fasttree/Figure5/iTOL_LNA_RLscores.csv", row.names = FALSE)
#read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
# dplyr::select(OTU, RL_score) %>%
# rename(HNA_score = RL_score) %>%
# write.csv(file = "data/fasttree/Figure5/iTOL_HNA_RLscores.csv", row.names = FALSE)
LNA_lake <- read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
dplyr::select(OTU, lake) %>%
mutate(lake = as.character(lake)) %>%
rename(LNA_Lake = lake)
HNA_lake <- read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
dplyr::select(OTU, lake) %>%
mutate(lake = as.character(lake)) %>%
rename(HNA_Lake = lake)
OTU_preferred_lake <- HNA_lake %>%
left_join(LNA_lake, by = "OTU") %>%
mutate(lake = ifelse(is.na(LNA_Lake), HNA_Lake,
ifelse(is.na(HNA_Lake), LNA_Lake,
ifelse(HNA_Lake == LNA_Lake, HNA_Lake,
"Mismatch")))) %>%
dplyr::select(OTU, lake)
itol_lake_colors <- c(
"Muskegon" = "#FF933F",
"Michigan" = "#EC4863",
"Inland" = "#5C2849",
"All" = "#8b6914",
"Muskegon-Inland" = "#FFD3B5",
"Mismatch" = "#CD0000")
lake_col_df <- itol_lake_colors %>%
as.list() %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column("lake") %>%
rename(color_name = V1) %>%
mutate(color_name = as.character(color_name))
#OTU_preferred_lake %>%
# left_join(lake_col_df, by = "lake") %>%
# dplyr::select(OTU, color_name, lake) %>%
# write.csv(file = "data/fasttree/Figure5/iTOL_Lake.csv", row.names = FALSE)
# Number of OTUs per system and FCM group
num_OTUs <- c(nrow(dfHNA_musk_scores), nrow(dfLNA_musk_scores), nrow(dfHNA_inland_scores), nrow(dfLNA_inland_scores),
nrow(dfHNA_michigan_scores), nrow(dfLNA_michigan_scores))
lake_names <- as.vector(c("Muskegon", "Muskegon", "Inland", "Inland", "Michigan", "Michigan"))
fcm_names <- as.vector(c("HNA", "LNA", "HNA", "LNA", "HNA", "LNA"))
summary_OTU_df <- as.data.frame(num_OTUs) %>%
mutate(lake = c("Muskegon", "Muskegon", "Inland", "Inland", "Michigan", "Michigan"),
fcm = c("HNA", "LNA", "HNA", "LNA", "HNA", "LNA"))
summary_OTU_df
## num_OTUs lake fcm
## 1 99 Muskegon HNA
## 2 98 Muskegon LNA
## 3 55 Inland HNA
## 4 79 Inland LNA
## 5 12 Michigan HNA
## 6 9 Michigan LNA
ggplot(summary_OTU_df, aes(x = fcm, y = num_OTUs, fill = fcm)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = fcm_colors) +
scale_y_continuous(expand = c(0,0), limits = c(0, 125)) +
labs(x = "Flow Cytometry Group", y = "Number of Selected OTUs") +
facet_grid(.~lake) + mytheme +
theme(legend.title = element_blank(),
legend.key.width = unit(0.5, "cm"),
legend.key.height = unit(0.5, "cm"))
lake_OTU_nums <- summary_OTU_df %>%
group_by(lake) %>%
summarise(sum(num_OTUs))
lake_OTU_nums
## # A tibble: 3 x 2
## lake `sum(num_OTUs)`
## <chr> <int>
## 1 Inland 134
## 2 Michigan 21
## 3 Muskegon 197
ggplot(lake_OTU_nums, aes(x = lake, y = `sum(num_OTUs)`, fill = lake)) +
geom_bar(stat = "identity", color = "black") +
labs(x = "Lake", y = "Total Number of Selected OTUs") +
scale_fill_manual(values = lake_colors) +
scale_y_continuous(expand = c(0,0), limits = c(0, 200)) +
mytheme +
theme(legend.title = element_blank(),
legend.key.width = unit(0.5, "cm"),
legend.key.height = unit(0.5, "cm"))
# How many selected OTUs in total?
nrow(phyfcm_fasttree_df)
## Error in nrow(phyfcm_fasttree_df): object 'phyfcm_fasttree_df' not found
# How many selected OTUs are in each Phylum?
phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
count(Phylum) %>%
datatable()
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
# How many selected OTUs within each phylum are HNA, LNA, or Both?
phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
group_by(Phylum) %>%
count(FCM) %>%
datatable()
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
# Bacteroidetes
bacteroidetes <- phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
dplyr::filter(Phylum == "Bacteroidetes") %>%
count(FCM) %>%
mutate(percent = n/sum(n)); bacteroidetes
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found
# Betaproteobacteria
betaproteobacteria <- phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
dplyr::filter(Phylum == "Betaproteobacteria") %>%
count(FCM) %>%
mutate(percent = n/sum(n)); betaproteobacteria
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
## Error in eval(expr, envir, enclos): object 'betaproteobacteria' not found
# Alphaproteobacteria
alphaproteobacteria <- phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
dplyr::filter(Phylum == "Alphaproteobacteria") %>%
count(FCM) %>%
mutate(percent = n/sum(n)); alphaproteobacteria
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
## Error in eval(expr, envir, enclos): object 'alphaproteobacteria' not found
# Verrucomicrobia
verrucomicrobia <- phyfcm_fasttree_df %>%
mutate(OTU = row.names(.)) %>%
dplyr::filter(Phylum == "Verrucomicrobia") %>%
count(FCM) %>%
mutate(percent = n/sum(n)); verrucomicrobia
## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found
## Error in eval(expr, envir, enclos): object 'verrucomicrobia' not found
# How many of the total OTUs were Bacteroidetes
top4_phyla <- sum(bacteroidetes$n)+sum(betaproteobacteria$n)+sum(alphaproteobacteria$n)+sum(verrucomicrobia$n)
## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found
sum(top4_phyla)/nrow(phyfcm_fasttree_df) # % of total OTUs selected
## Error in eval(expr, envir, enclos): object 'top4_phyla' not found
# Percent of total selected OTUs by each phylum
sum(bacteroidetes$n)/nrow(phyfcm_fasttree_df) # % of Bacteroidetes
## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found
sum(betaproteobacteria$n)/nrow(phyfcm_fasttree_df) # % of Betaproteobacteria
## Error in eval(expr, envir, enclos): object 'betaproteobacteria' not found
sum(alphaproteobacteria$n)/nrow(phyfcm_fasttree_df) # % of Alphaproteobacteria
## Error in eval(expr, envir, enclos): object 'alphaproteobacteria' not found
sum(verrucomicrobia$n)/nrow(phyfcm_fasttree_df) # % of Verrucomicrobia
## Error in eval(expr, envir, enclos): object 'verrucomicrobia' not found
# Load in the scores
LNA_scores_df <- read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
dplyr::select(OTU, RL_score) %>%
rename(LNA_score = RL_score)
## Error in file(file, "rt"): cannot open the connection
HNA_scores_df <- read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
dplyr::select(OTU, RL_score) %>%
rename(HNA_score = RL_score)
## Error in file(file, "rt"): cannot open the connection
stopifnot(dim(LNA_scores_df) == dim(HNA_scores_df))
## Error in dim(LNA_scores_df) == dim(HNA_scores_df): object 'LNA_scores_df' not found
# Put them into one data frame
summarized_RL_scores <- HNA_scores_df %>%
left_join(LNA_scores_df, by = "OTU")
## Error in eval(lhs, parent, parent): object 'HNA_scores_df' not found
# For simpliclity, make sure the vectors of OTUs match the OTU order in the phylogeny
summarized_RL_scores <- summarized_RL_scores[match(rooted_tree$tip.label, summarized_RL_scores$OTU),]
## Error in eval(expr, envir, enclos): object 'summarized_RL_scores' not found
stopifnot(rooted_tree$tip.label == summarized_RL_scores$OTU)
## Error in rooted_tree$tip.label == summarized_RL_scores$OTU: object 'rooted_tree' not found
# Load FCM data that is categorical
FCM_categorical_df <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
dplyr::select(OTU, FCM)
## Error in file(file, "rt"): cannot open the connection
FCM_categorical_df <- FCM_categorical_df[match(rooted_tree$tip.label, FCM_categorical_df$OTU),]
## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found
stopifnot(rooted_tree$tip.label == FCM_categorical_df$OTU)
## Error in rooted_tree$tip.label == FCM_categorical_df$OTU: object 'rooted_tree' not found
####### TREEs for only HNA and LNA
# HNA: Subset all of the HNA OTUs
HNA_df_phydist <- summarized_RL_scores %>%
select(OTU, HNA_score) %>%
na.omit()
## Error in eval(lhs, parent, parent): object 'summarized_RL_scores' not found
HNAscores <- HNA_df_phydist$HNA_score
## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found
names(HNAscores) <- HNA_df_phydist$OTU
## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found
# Subset the larger tree to only have the HNA OTUs
HNA_rooted_tree <- ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(HNA_df_phydist$OTU, rooted_tree$tip.label)])
## Error in ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(HNA_df_phydist$OTU, : object 'rooted_tree' not found
# Make sure OTUs are in the same order for the test
HNA_df_phydist <- HNA_df_phydist[match(HNA_rooted_tree$tip.label, HNA_df_phydist$OTU),]
## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found
stopifnot(HNA_df_phydist$OTU == HNA_rooted_tree$tip.label)
## Error in HNA_df_phydist$OTU == HNA_rooted_tree$tip.label: object 'HNA_df_phydist' not found
stopifnot(names(HNAscores) == HNA_rooted_tree$tip.label)
## Error in names(HNAscores) == HNA_rooted_tree$tip.label: object 'HNAscores' not found
# LNA: Subset all of the HNA OTUs
LNA_df_phydist <- summarized_RL_scores %>%
select(OTU, LNA_score) %>%
na.omit()
## Error in eval(lhs, parent, parent): object 'summarized_RL_scores' not found
LNAscores <- LNA_df_phydist$LNA_score
## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found
names(LNAscores) <- LNA_df_phydist$OTU
## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found
# Subset the larger tree to only have the LNA OTUs
LNA_rooted_tree <- ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(LNA_df_phydist$OTU, rooted_tree$tip.label)])
## Error in ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(LNA_df_phydist$OTU, : object 'rooted_tree' not found
# Make sure OTUs are in the same order for the test
LNA_df_phydist <- LNA_df_phydist[match(LNA_rooted_tree$tip.label, LNA_df_phydist$OTU),]
## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found
stopifnot(LNA_df_phydist$OTU == LNA_rooted_tree$tip.label)
## Error in LNA_df_phydist$OTU == LNA_rooted_tree$tip.label: object 'LNA_df_phydist' not found
stopifnot(names(LNAscores) == LNA_rooted_tree$tip.label)
## Error in names(LNAscores) == LNA_rooted_tree$tip.label: object 'LNAscores' not found
# Discrete models: From Samantha Price & Roi Holzman: http://www.eve.ucdavis.edu/~wainwrightlab/Roi/Site/Teaching_files/Intro2Phylo_S4.R
discrete_FCM <- FCM_categorical_df$FCM
## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found
names(discrete_FCM) <- FCM_categorical_df$OTU
## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found
# Discrete Pagel's Lambda Model
discrete_lambda_fit <- fitDiscrete(rooted_tree, discrete_FCM, treeTransform = "lambda")
## Error in treedata(phy, dat): object 'discrete_FCM' not found
# To test for significant phylogenetic signal, compare the negative log likelihood when there is no signal with rescale
lambda0_tree <- rescale(rooted_tree, "lambda", 0) # A big polytomy tree
## Error in rescale(rooted_tree, "lambda", 0): object 'rooted_tree' not found
lambda0_fit <- fitDiscrete(lambda0_tree, discrete_FCM)
## Error in treedata(phy, dat): object 'discrete_FCM' not found
# Is there a significant difference? NO
# Using a likelihood ratio test
pchisq(2*(lambda0_fit$opt$lnL-discrete_lambda_fit$opt$lnL), 1, lower.tail=FALSE)
## Error in pchisq(2 * (lambda0_fit$opt$lnL - discrete_lambda_fit$opt$lnL), : object 'lambda0_fit' not found
From Roi Holzman & Samantha Price:
# # We chose HNA and LNA RL Score as the trait we are testing for phylogenetic signal, with 999 randomizations:
# Does the HNA RL Score have phylogenetic signal?
phylosig(HNA_rooted_tree, HNAscores, method = "lambda", test = TRUE, nsim = 999)
## Error in phylosig(HNA_rooted_tree, HNAscores, method = "lambda", test = TRUE, : object 'HNA_rooted_tree' not found
# Does the LNA RL Score have phylogenetic signal?
phylosig(LNA_rooted_tree, LNAscores, method = "lambda", test = TRUE, nsim = 999)
## Error in phylosig(LNA_rooted_tree, LNAscores, method = "lambda", test = TRUE, : object 'LNA_rooted_tree' not found
# HNA
phylosig(tree = rooted_tree, x = HNAscores, method = "K", test = TRUE, nsim = 999)
## Error in phylosig(tree = rooted_tree, x = HNAscores, method = "K", test = TRUE, : object 'rooted_tree' not found
# LNA
phylosig(tree = rooted_tree, x = LNAscores, method = "K", test = TRUE, nsim = 999)
## Error in phylosig(tree = rooted_tree, x = LNAscores, method = "K", test = TRUE, : object 'rooted_tree' not found
# HNA: Calculate the pairwise distances of the OTUs in the HNA phylogeny
HNA_rooted_phy_dist <- cophenetic(HNA_rooted_tree)
## Error in cophenetic(HNA_rooted_tree): object 'HNA_rooted_tree' not found
# Do HNA OTUs have phylogenetic autocorrelation with the Moran's I test?
moran_HNA_test <- abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, method = "Abouheif", nrepet = 999)
## Error in abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, : object 'HNA_rooted_phy_dist' not found
moran_HNA_test
## Error in eval(expr, envir, enclos): object 'moran_HNA_test' not found
# LNA: Calculate the pairwise distances of the OTUs in the HNA phylogeny
LNA_rooted_phy_dist <- cophenetic(LNA_rooted_tree)
## Error in cophenetic(LNA_rooted_tree): object 'LNA_rooted_tree' not found
# Do LNA OTUs have phylogenetic autocorrelation with the Moran's I test?
moran_LNA_test <- abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, method = "Abouheif", nrepet = 999)
## Error in abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, : object 'LNA_rooted_phy_dist' not found
moran_LNA_test
## Error in eval(expr, envir, enclos): object 'moran_LNA_test' not found
# HNA: Perform an Abouheif moran test using Monte Carlo simulations (default is 999 randomizations)
abouheif_HNA_test <- abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, method = "oriAbouheif", nrepet = 999)
## Error in abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, : object 'HNA_rooted_phy_dist' not found
abouheif_HNA_test
## Error in eval(expr, envir, enclos): object 'abouheif_HNA_test' not found
# LNA: Perform an Abouheifmoran test using Monte Carlo simulations (default is 999 randomizations)
abouheif_LNA_test <- abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, method = "oriAbouheif", nrepet = 999)
## Error in abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, : object 'LNA_rooted_phy_dist' not found
abouheif_LNA_test
## Error in eval(expr, envir, enclos): object 'abouheif_LNA_test' not found
devtools::session_info() # This will include session info with all R package version information
## setting value
## version R version 3.5.1 (2018-07-02)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Toronto
## date 2018-08-14
##
## package * version date source
## ade4 * 1.7-11 2018-04-05 CRAN (R 3.5.0)
## adegenet 2.1.1 2018-02-02 CRAN (R 3.5.0)
## adephylo * 1.1-11 2017-12-18 CRAN (R 3.5.0)
## animation 2.5 2017-03-30 CRAN (R 3.5.0)
## ape * 5.1 2018-04-04 CRAN (R 3.5.0)
## assertthat 0.2.0 2017-04-11 CRAN (R 3.5.0)
## backports 1.1.2 2017-12-13 CRAN (R 3.5.0)
## base * 3.5.1 2018-07-05 local
## base64enc 0.1-3 2015-07-28 CRAN (R 3.5.0)
## bindr 0.1.1 2018-03-13 CRAN (R 3.5.0)
## bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.5.0)
## Biobase 2.41.2 2018-07-18 Bioconductor
## BiocGenerics 0.27.1 2018-06-17 Bioconductor
## biomformat 1.9.0 2018-06-13 Bioconductor
## Biostrings 2.49.1 2018-08-04 Bioconductor
## bitops 1.0-6 2013-08-17 CRAN (R 3.5.0)
## boot 1.3-20 2017-08-06 CRAN (R 3.5.1)
## broom 0.5.0 2018-07-17 CRAN (R 3.5.0)
## caTools 1.17.1.1 2018-07-20 CRAN (R 3.5.0)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.5.0)
## cli 1.0.0 2017-11-05 CRAN (R 3.5.0)
## cluster 2.0.7-1 2018-04-13 CRAN (R 3.5.1)
## clusterGeneration 1.3.4 2015-02-18 CRAN (R 3.5.0)
## coda 0.19-1 2016-12-08 CRAN (R 3.5.0)
## codetools 0.2-15 2016-10-05 CRAN (R 3.5.1)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.5.0)
## combinat 0.0-8 2012-10-29 CRAN (R 3.5.0)
## compiler 3.5.1 2018-07-05 local
## cowplot * 0.9.3 2018-07-15 CRAN (R 3.5.0)
## crayon 1.3.4 2017-09-16 CRAN (R 3.5.0)
## crosstalk 1.0.0 2016-12-21 CRAN (R 3.5.0)
## d3heatmap * 0.6.1.2 2018-02-01 CRAN (R 3.5.0)
## data.table 1.11.4 2018-05-27 CRAN (R 3.5.0)
## datasets * 3.5.1 2018-07-05 local
## deldir 0.1-15 2018-04-01 CRAN (R 3.5.0)
## deSolve 1.21 2018-05-09 cran (@1.21)
## devtools * 1.13.6 2018-06-27 CRAN (R 3.5.0)
## digest 0.6.15 2018-01-28 CRAN (R 3.5.0)
## dplyr * 0.7.6 2018-06-29 CRAN (R 3.5.1)
## DT * 0.4 2018-01-30 CRAN (R 3.5.0)
## evaluate 0.11 2018-07-17 CRAN (R 3.5.0)
## expm 0.999-2 2017-03-29 CRAN (R 3.5.0)
## fansi 0.2.3 2018-05-06 CRAN (R 3.5.0)
## fastmatch 1.1-0 2017-01-28 CRAN (R 3.5.0)
## forcats * 0.3.0 2018-02-19 CRAN (R 3.5.0)
## foreach 1.4.4 2017-12-12 CRAN (R 3.5.0)
## gdata 2.18.0 2017-06-06 CRAN (R 3.5.0)
## geiger * 2.0.6 2015-09-07 CRAN (R 3.5.0)
## ggplot2 * 3.0.0 2018-07-03 CRAN (R 3.5.0)
## ggtree * 1.13.2 2018-07-19 Bioconductor
## glue 1.3.0 2018-07-17 CRAN (R 3.5.0)
## gmodels 2.18.1 2018-06-25 CRAN (R 3.5.0)
## gplots * 3.0.1 2016-03-30 CRAN (R 3.5.0)
## graphics * 3.5.1 2018-07-05 local
## grDevices * 3.5.1 2018-07-05 local
## grid 3.5.1 2018-07-05 local
## gtable 0.2.0 2016-02-26 CRAN (R 3.5.0)
## gtools 3.8.1 2018-06-26 CRAN (R 3.5.0)
## haven 1.1.2 2018-06-27 CRAN (R 3.5.0)
## hms 0.4.2 2018-03-10 CRAN (R 3.5.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.5.0)
## htmlwidgets 1.2 2018-04-19 CRAN (R 3.5.0)
## httpuv 1.4.5 2018-07-19 CRAN (R 3.5.0)
## httr 1.3.1 2017-08-20 CRAN (R 3.5.0)
## igraph 1.2.2 2018-07-27 CRAN (R 3.5.0)
## IRanges 2.15.16 2018-07-18 Bioconductor
## iterators 1.0.10 2018-07-13 CRAN (R 3.5.0)
## jsonlite 1.5 2017-06-01 CRAN (R 3.5.0)
## KernSmooth 2.23-15 2015-06-29 CRAN (R 3.5.1)
## knitr 1.20 2018-02-20 CRAN (R 3.5.0)
## labeling 0.3 2014-08-23 CRAN (R 3.5.0)
## later 0.7.3 2018-06-08 CRAN (R 3.5.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.5.1)
## lazyeval 0.2.1 2017-10-29 CRAN (R 3.5.0)
## LearnBayes 2.15.1 2018-03-18 CRAN (R 3.5.0)
## lubridate 1.7.4 2018-04-11 CRAN (R 3.5.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.5.0)
## maps * 3.3.0 2018-04-03 CRAN (R 3.5.0)
## MASS 7.3-50 2018-04-30 CRAN (R 3.5.1)
## Matrix 1.2-14 2018-04-13 CRAN (R 3.5.1)
## memoise 1.1.0 2017-04-21 CRAN (R 3.5.0)
## methods * 3.5.1 2018-07-05 local
## mgcv 1.8-24 2018-06-23 CRAN (R 3.5.1)
## mime 0.5 2016-07-07 CRAN (R 3.5.0)
## mnormt 1.5-5 2016-10-15 CRAN (R 3.5.0)
## modelr 0.1.2 2018-05-11 CRAN (R 3.5.0)
## msm 1.6.6 2018-02-02 CRAN (R 3.5.0)
## multtest 2.37.0 2018-06-13 Bioconductor
## munsell 0.5.0 2018-06-12 CRAN (R 3.5.0)
## mvtnorm 1.0-8 2018-05-31 cran (@1.0-8)
## nlme 3.1-137 2018-04-07 CRAN (R 3.5.1)
## numDeriv 2016.8-1 2016-08-27 CRAN (R 3.5.0)
## parallel 3.5.1 2018-07-05 local
## permute 0.9-4 2016-09-09 CRAN (R 3.5.0)
## phangorn 2.4.0 2018-02-15 CRAN (R 3.5.0)
## phylobase 0.8.4 2018-07-06 Github (fmichonneau/phylobase@0f4db8f)
## phyloseq * 1.25.2 2018-07-15 Bioconductor
## phytools * 0.6-44 2017-11-12 CRAN (R 3.5.0)
## pillar 1.3.0 2018-07-14 CRAN (R 3.5.0)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.5.0)
## plotrix 3.7-2 2018-05-27 CRAN (R 3.5.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.5.0)
## png 0.1-7 2013-12-03 CRAN (R 3.5.0)
## prettyunits 1.0.2 2015-07-13 cran (@1.0.2)
## progress 1.2.0 2018-06-14 cran (@1.2.0)
## promises 1.0.1 2018-04-13 CRAN (R 3.5.0)
## purrr * 0.2.5 2018-05-29 CRAN (R 3.5.0)
## quadprog 1.5-5 2013-04-17 CRAN (R 3.5.0)
## R6 2.2.2 2017-06-17 CRAN (R 3.5.0)
## Rcpp 0.12.18 2018-07-23 CRAN (R 3.5.0)
## readr * 1.1.1 2017-05-16 CRAN (R 3.5.0)
## readxl 1.1.0 2018-04-20 CRAN (R 3.5.0)
## reshape2 1.4.3 2017-12-11 CRAN (R 3.5.0)
## rhdf5 2.25.4 2018-06-13 Bioconductor
## Rhdf5lib 1.3.1 2018-06-13 Bioconductor
## rlang 0.2.1 2018-05-30 cran (@0.2.1)
## rmarkdown 1.10 2018-06-11 CRAN (R 3.5.0)
## rncl 0.8.3 2018-07-27 CRAN (R 3.5.0)
## RNeXML 2.1.1 2018-05-07 cran (@2.1.1)
## rprojroot 1.3-2 2018-01-03 CRAN (R 3.5.0)
## rstudioapi 0.7 2017-09-07 CRAN (R 3.5.0)
## rvcheck 0.1.0 2018-05-23 CRAN (R 3.5.0)
## rvest 0.3.2 2016-06-17 CRAN (R 3.5.0)
## S4Vectors 0.19.19 2018-07-18 Bioconductor
## scales 0.5.0 2017-08-24 CRAN (R 3.5.0)
## scatterplot3d 0.3-41 2018-03-14 CRAN (R 3.5.0)
## seqinr 3.4-5 2017-08-01 CRAN (R 3.5.0)
## shiny 1.1.0 2018-05-17 CRAN (R 3.5.0)
## sp 1.3-1 2018-06-05 CRAN (R 3.5.0)
## spData 0.2.9.3 2018-08-01 CRAN (R 3.5.0)
## spdep 0.7-7 2018-04-03 CRAN (R 3.5.0)
## splines 3.5.1 2018-07-05 local
## stats * 3.5.1 2018-07-05 local
## stats4 3.5.1 2018-07-05 local
## stringi 1.2.4 2018-07-20 CRAN (R 3.5.0)
## stringr * 1.3.1 2018-05-10 CRAN (R 3.5.0)
## subplex 1.5-4 2018-04-05 cran (@1.5-4)
## survival 2.42-6 2018-07-13 CRAN (R 3.5.0)
## tibble * 1.4.2 2018-01-22 CRAN (R 3.5.0)
## tidyr * 0.8.1 2018-05-18 CRAN (R 3.5.0)
## tidyselect 0.2.4 2018-02-26 CRAN (R 3.5.0)
## tidytree 0.1.9 2018-06-13 CRAN (R 3.5.0)
## tidyverse * 1.2.1 2017-11-14 CRAN (R 3.5.0)
## tools 3.5.1 2018-07-05 local
## treeio 1.5.2 2018-07-19 Bioconductor
## utf8 1.1.4 2018-05-24 CRAN (R 3.5.0)
## utils * 3.5.1 2018-07-05 local
## uuid 0.1-2 2015-07-28 cran (@0.1-2)
## vegan 2.5-2 2018-05-17 CRAN (R 3.5.0)
## withr 2.1.2 2018-03-15 CRAN (R 3.5.0)
## XML 3.98-1.13 2018-08-01 CRAN (R 3.5.0)
## xml2 1.2.0 2018-01-24 CRAN (R 3.5.0)
## xtable 1.8-2 2016-02-05 CRAN (R 3.5.0)
## XVector 0.21.3 2018-06-23 Bioconductor
## yaml 2.2.0 2018-07-25 CRAN (R 3.5.0)
## zlibbioc 1.27.0 2018-06-13 Bioconductor